* Master Do File - Analysis
* Pakistan Property Tax Project

/*******************************************************************************
	OVERVIEW
	The first part of the do file (A) compiles the cleaned data into datasets 
	that will be used in analysis.
	
	The second part of the do file (B) generates tables that appear in the paper.

*******************************************************************************/

clear all
set more off

/*******************************************************************************
* Set-up:
*******************************************************************************/

/* 	Set up the directory global. This will be shared across do files. 
	Just add your directory below to get set up. */

cap cd "C:\Users\alawther\Dropbox (MIT)\Pakistan\Public-Alyssa\"

global PAKTAX_ANALYSIS = "`c(pwd)'"
global PAPER_ANALYSIS = "`c(pwd)'//Analysis/"
global DATA = "`c(pwd)'/Data/"
global PAKTAX_RANDOMIZATION = "`c(pwd)'/Data/Randomization/"
global TABLE_OUTPUT = "`c(pwd)'//Analysis/Tables/"
global RI_OUTPUT = "`c(pwd)'//Analysis/RI/"
cap cd "${PAPER_ANALYSIS}"

/*******************************************************************************
* Part A. Compile cleaned data into datasets used for analysis.
*******************************************************************************/

/*******************************************************************************
****	HR 
*******************************************************************************/
cap cd "${PAPER_ANALYSIS}"
* We use HR data to determine the extent to which treatment was actually implemented
* (i.e. treatmennt no longer applied if a treated staff was transferred out)

** Implementation across quarters for Phase II (year 1 of treatment)
	// INPUTS: Data/Randomization/Phase II/Randomization/110810 7861 Subreatment Randomization - Copy
	//		   Data/Randomization/Phase II/Randomization/110809 Randomization Output - Additional Circles 999 - Copy
	// OUTPUTS: Analysis/Coded Data/Phase II Balloted Circle HR Change Tracking.dta
	do "Do Files/Coding/121105 Phase II Implementation Tracking.do"

** Implementation across quarters for Phase III (year 2 of treatment)
	// INPUTS: Data/Randomization/ETO & AETO/120707 ETO Randomization Output 6465 - New List - Randomization Circles.dta
	//		   Data/Cleaned Data/Administrative Data/Punjab Compiled - Quarterly.dta
	//		   Data/Raw Data/Current HR Confirmation/Master Treated Staff (long).dta
	//	       Data/Cleaned Data/Randomization/130506 Cleaned Phase III Randomization.dta
	// OUTPUTS: Analysis/Coded Data/Phase III ETO Clusters and Implementation.dta
do "Do Files/Coding/130903 ETO Clusters and Phase III Implementation.do"


** Balance Stats for HR; number of staff at randomization, etc.
	// INPUTS:  Data/Raw Data/Current HR Confirmation/110709 Final Director Verified Master Punjab Current HR Data.csv
	// OUTPUTS: Analysis/Coded Data/HR/Staff Stats.dta
do "Do Files/Coding/130517 Balance Stats on Staff.do"


/*******************************************************************************
****	ADMIN TAX DATA
*******************************************************************************/
cap cd "${PAPER_ANALYSIS}"
** Clustering by robust partition
	// INPUTS: Data/Cleaned Data/Administrative Data/Crosswalk/Keys/*
	// OUTPUTS: Analysis/Coded Data/Circle Clusters.dta
do "Do Files/Coding/Circle Clusters.do"

** Treatment Variables
	// INPUTS: Analysis/Coded Data/Phase II Balloted Circle HR Change Tracking.dta
	//		   Analysis/Coded Data/Phase III ETO Clusters and Implementation.dta
	//		   Data/Cleaned Data/Randomization/130506 Cleaned Phase III Randomization.dta
	//		   Data/Cleaned Data/Administrative Data/Crosswalk/Quarterly/To Randomization/*
	// OUTPUTS: Analysis/Coded Data/treatment_vars.dta
do "Do Files/Coding/Admin Treatment Variables"
	
** Compile everything above together with crosswalked admin data
	// INPUTS:  Data/Cleaned Data/Administrative Data/Crosswalk/Quarterly/To Present - Analysis/2011_q4_crosswalk.dta
	//		    Data/Cleaned Data/Administrative Data/Crosswalk/Quarterly/To Present - Analysis/2012_q1_crosswalk.dta
	//		    Data/Cleaned Data/Administrative Data/Crosswalk/Quarterly/To Randomization/*
	//		    Analysis/Coded Data/treatment_vars.dta
	//		    Analysis/Coded Data/Circle Clusters.dta
	//		    Analysis/Coded Data/Staff Stats.dta
	//		    Analysis/Coded Data/Benchmarks/121221 Phase II Benchmarks - Full Sample - RANDOMIZATION CIRCLES.dta
	// OUTPUTS: Analysis/Coded Data/Phase III Analysis.dta
do "Do Files/Coding/Analysis Coding.do"


/*******************************************************************************
****	SURVEY DATA
*******************************************************************************/
cap cd "${PAPER_ANALYSIS}"	
** Clustering by robust partition in terms of survey circle IDs
	// INPUTS: Data/Cleaned Data/Administrative Data/Crosswalk/Keys/*
	// OUTPUTS: Coded Data/Survey Circle Clusters.dta
do "Do Files/Coding/survey/Survey Circle Clusters.do"

** Crosswalk treatment variables to Q2 FY 2013 circles (which is what the survey data is defined in terms of)
do "Do Files/Coding/survey/Survey Treatment Variables.do"

** Compile it all together
do "Do Files/Coding/survey/Survey Analysis Coding.do"

** Make Weights
do "Do Files/Coding/survey/make_weights.do"

/*******************************************************************************
****	INSPECTOR SURVEY
*******************************************************************************/
cap cd "${PAPER_ANALYSIS}"
** Treatment variables in terms of Q4 2013 circles
do "Do Files/Coding/Inspector Treatment Variables.do"

** Compile it all together
	// INPUTS: Data/Cleaned Data/Administrative Data/Crosswalk/Keys/*
	// OUTPUTS: Analysis/Coded Data/Inspector Circle Clusters
do "Do Files/Coding/Inspector Circle Clusters.do"


/*******************************************************************************
****	RANDOMIZATION INFERENCE
*******************************************************************************/
cap cd "${PAPER_ANALYSIS}"
* Randomization Inference
	// For the supervisor treatment and balance table we need to use randomized inference p-values
	// instead of clustered standard errors, because in simulations we've checked
	// and found that in small samples the regular standard errors over-reject.

* CAREFUL: This takes a long time to run.
* Number of iterations for analysis: 1000.

global iter 1000
do "Do Files/Coding/150525 rerandomization.do"


********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************

/*******************************************************************************
* Part B: Analyze Data & Create Tables
*******************************************************************************/
cap cd "${PAPER_ANALYSIS}"

/*******************************************************************************
***		PAPER TABLES:
			Creates tables in the main paper.
*******************************************************************************/
	// Standard treatment is to put the INFO treatment in the controls.
	// However, we have an online appendix version where we take it out.

	// Tables generated here will have a _infocontrol suffix if from the info_in_controls folder

	* 1 Experimental Design
	do "Do Files/Paper/treat_implement_table.do"

	* 2 Summary Statistics
	do "Do Files/Paper/summary_stats.do"

	* 3 Impacts on Revenue Collected
	do "Do Files/Paper/info_in_controls/revenue.do"

	* 4 Impacts on Non-Revenue Outcomes
	do "Do Files/Paper/info_in_controls/nonrevenue.do"

	* 5 Impacts on Number of Reassessed Properties
	do "Do Files/Paper/info_in_controls/number_properties.do"
	
	* 6 Impacts on Tax Payments and Corruption, by Reassessed Status
	do "Do Files/Paper/info_in_controls/who_pays_more_main.do"

	* 7 Impacts on Satisfaction and Accuracy, by Reassessed Status
	do "Do Files/Paper/info_in_controls/sec9_nonrevenue.do"
	
	* 8 Selection Effects on Reassessment
	do "Do Files/Paper/info_in_controls/who_gets_reassessed_main.do" 
	
	* 9 Cost-Effectiveness of Incentives 
	do "Do Files/Paper/ROI.do"


/*******************************************************************************
*** 	APPENDIX TABLES: 
			Creates tables in the online appendices.
*******************************************************************************/

* Appendix F: Additional Results
* -----------------------------------------------------
	* Spillovers
	do "Do Files/Coding/Clean Tax Circle Boundaries.do" // Creates proximity.dta, which is an input into spillovers.do
	cap cd "${PAPER_ANALYSIS}"
	do "Do Files/Paper/spillovers.do"
	
	* Balance
	do "Do Files/Paper/RI/130706 Randomization Balance_RI.do" // Calculate RI p-values first. CAREFUL: This takes a very long time to run.
	do "Do Files/Paper/randomization_balance.do"

	* Correlation of Satisfaction and Corruption Variables
	do "Do Files/Paper/satisfaction_correlations.do"

	* Impacts on Satisfaction with the Government
	do "Do Files/Paper/info_in_controls/satisfaction.do"

	* Impacts on Tax Base and Recovery Rates, All Treatments
	do "Do Files/Paper/info_in_controls/margins_main.do"
	
	* Change in Bribe Payments on the Intensive and Extensive Margin
	do "Do Files/Paper/extensions/bribe_intensive.do"
	
	* Impacts on Inspector Effort
	do "Do Files/Paper/info_in_controls/inspector_effort.do"
	
	* Treatment Effect on Probability of Transfer by Position
	do "Do Files/Paper/extensions/transfers_long.do"
	
	* Inspectors' Knowledge of Treatments
	do "Do Files/Paper/inspector_knowledge.do"
	
	* Inspector Beliefs
	do "Do Files/Paper/inspector_beliefs.do"
	
	* Mechanisms Beyond Price Effects
	// The fist two files are needed to run the third, but they take a long time so be careful before calling
	do "Do Files/Paper/info_in_controls/income_effect_standarderror.do" // Bootstrap standard errors
	do "Do Files/Paper/RI/130920 Revenue_Supervisors_RI.do" 			// Calculate RI p-values first. 
	do "Do Files/Paper/info_in_controls/additional_channels.do"
	
	* Impact of Interactions Between Supervisory and Inspector Treatments
	* RI p values already generated in "Do Files\Paper\RI\130920 Revenue_Supervisors_RI.do"
	do "Do Files/Paper/info_in_controls/supervisors_interaction.do"

	* Impact of Treatment on Inspector Monitoring
	do "Do Files/Paper/info_in_controls/inspector_monitoring.do"
	
	* Newly Constructed Properties vs Reassessed Properties
	do "Do Files/Paper/extensions/new_vs_randomnew.do"

	* Figure: Heterogeneity in Reassessment Probability by Tax Density, Treatment vs Control
	do "Do Files/Paper/extensions/reassess_rate_bootstrap.do"  // This requires 1000 iterations, will take time
	do "Do Files/Paper/extensions/reassessment_rate_heterogeneity.do"
	
	
* Appendix G: Extensions to Main Tables and Robustness Checks
* -----------------------------------------------------
	* Summary Statistics, Extended Version
	do "Do Files/Paper/summary_stats_full.do"

	* Impacts on Revenue Collected, Reduced Form Estimates
	do "Do Files/Paper/info_in_controls/revenue_reducedform.do"	
	
	* Impacts on Revenue Collected, Dropping Circles with Boundary Changes
	do "Do Files/Paper/info_in_controls/revenue_nocrosswalk.do"
	
	* Impacts on Non-Revenue Outcomes, Reduced Form Estimates
	do "Do Files/Paper/info_in_controls/nonrevenue_reducedform.do"
	
	* Impacts on Non-Revenue Outcomes Controlling for Objective Property Characteristics
	do "Do Files/Paper/info_in_controls/nonrevenue_propcontrol.do"

	* Impacts on Non-Revenue Outcomes, Controlling for Perceptions of Electricity Bureau
	do "Do Files/Paper/info_in_controls/nonrevenue_eleccontrol.do"

	* Impacts on Non-Revenue Outcomes, Ordered Probit Specification
	do "Do Files/Paper/info_in_controls/nonrevenue_orderedprobit.do"
	
	
* Appendix H: Separating Effects by Subtreatment
* -----------------------------------------------------
	* Impacts on Tax Payments and Corruption, by Subtreatment
	do "Do Files/Paper/info_in_controls/who_pays_more_subs.do"
	
	* Impacts on Satisfaction and Accuracy by Reassessed Status, by Subtreatment
	do "Do Files/Paper/info_in_controls/sec9_nonrevenue_subs.do"
	
	* Impact on Tax Base and Recovery Rates, by Subtreatment
	do "Do Files/Paper/info_in_controls/margins_subs.do"


* Appendix I: Information Treatment Separated from the Control Group
* --------------------------------------------------------
// Tables generated here will have a _infoout suffix
	* Impacts on Revenue Collected, Separating Information Treatment
	do "Do Files/Paper/info_out_controls/revenue.do"

	* Impacts on Non-Revenue Outcomes, Separating Information Treatment
	do "Do Files/Paper/info_out_controls/nonrevenue.do"
	 
	* Impacts on Number of Reassessed Properties, Separating Information Treatment
	do "Do Files/Paper/info_out_controls/number_properties.do"	

	* Impacts on Tax Payments and Corruption by Reassessed Status, Separating Information Treatment
	do "Do Files/Paper/info_out_controls/who_pays_more_main.do"		
	
	* Impacts on Satisfaction and Accuracy by Reassessed Status, Separating Information Treatment
	do "Do Files/Paper/info_out_controls/sec9_nonrevenue.do"	

	* Selection Effects in Reassessment, Separating Information Treatment
	do "Do Files/Paper/info_out_controls/who_gets_reassessed_main.do"

	* Impacts on Tax Base and Recovery Rates, Separating Information Treatment
	do "Do Files/Paper/info_out_controls/margins_main.do"	
	
	
* Appendix J: Dropping Revenue Plus Circles
* --------------------------------------------------------
	* Impacts on Revenue Collected, Dropping Revenue Plus Circles
	do "Do Files/Paper/droprevplus/revenue.do"

	* Impacts on Non-Revenue Outcomes, Dropping Revenue Plus Circles
	do "Do Files/Paper/droprevplus/nonrevenue.do"
	
	* Impacts on Number of Reassessed Properties, Dropping Revenue Plus Circles
	do "Do Files/Paper/droprevplus/number_properties.do"		

	* Impacts on Tax Payments and Corruption by Reassessed Status, Dropping Revenue Plus Circles
	do "Do Files/Paper/droprevplus/who_pays_more_main.do"

	* Impacts on Satisfaction and Accuracy by Reassessed Status, Dropping Revenue Plus Circles
	do "Do Files/Paper/droprevplus/sec9_nonrevenue.do"

	* Selection Effects in Reassessment, Dropping Revenue Plus Circles
	do "Do Files/Paper/droprevplus/who_gets_reassessed_main.do"
	
	* Impacts on Tax Base and Recovery Rates, Dropping Revenue Plus Circles
	do "Do Files/Paper/droprevplus/margins_main.do"	
	
	* Mechanisms Beyond Price Effects, Dropping Revenue Plus Circles
	do "Do Files/Paper/droprevplus/income_effect_standarderror.do"		// Bootstrap standard errors
	do "Do Files/Paper/RI/140801 Revenue_Supervisors_DropRevPlus_RI.do" // Calculate RI p-values first. CAREFUL: This takes a very long time to run.
	do "Do Files/Paper/droprevplus/additional_channels.do"

	
* Appendix K: Controlling for Variables from Balance Check
* --------------------------------------------------------
	* Impacts on Revenue Outcomes, Controlling for Balance Check Variables
	do "Do Files/Paper/info_in_controls/revenue_balancecontrol.do"
	
	*Impacts on Non-Revenue Outcomes, Controlling for Balance Check Variables
	do "Do Files/Paper/info_in_controls/nonrevenue_balancecontrol.do"

	
* Appendix L: Dropping GPS Sampled Properties
* --------------------------------------------------------
	* Impact on Tax Payments and Corruption by Reassessed Status, Dropping GPS Sampled Properties
	do "Do Files/Paper/info_in_controls/who_pays_more_main_dropfirstpoint.do"
	
	* Selection Effects in Reassessment, Dropping GPS Sampled Properties
	do "Do Files/Paper/info_in_controls/who_gets_reassessed_main_dropfirstpoint.do"
	
	
* Appendix M: Dropping Renters
* --------------------------------------------------------
	* Impacts on Non-Revenue Outcomes, Dropping Renters
	do "Do Files/Paper/info_in_controls/nonrevenue_droprenters.do"
	
	* Impacts on Bribes and Tax Payments, Dropping Renters
	do "Do Files/Paper/info_in_controls/who_pays_more_main_droprenter.do"	
	
	
* Appendix N: Including Both Phases of the Survey
* --------------------------------------------------------
	* Impacts on Non-Revenue Outcomes, Both Phases
	do "Do Files/Paper/info_in_controls/nonrevenue_bothphase.do"
	
	* Impacts on Tax Payments and Corruption by Reassessed Status, Both Phases
	do "Do Files/Paper/info_in_controls/who_pays_more_main_bothphase.do"
	
	* Impacts on Tax Payments and Corruption by Reassessed Status and Subtreatment, Both Phases
	do "Do Files/Paper/info_in_controls/who_pays_more_subs_bothphase.do"
	
	* Impacts on Satisfaction and Accuracy by Reassessed Status, Both Phases
	do "Do Files/Paper/info_in_controls/sec9_nonrevenue_bothphase.do"
	
	* Impacts on Satisfaction and Accuracy by Reassessed Status and Subtreatment, Both Phases
	do "Do Files/Paper/info_in_controls/sec9_nonrevenue_subs_bothphase.do"
